import os
import pandas as pd
import requests
import json

def collect(start,type,filename):
    url = "https://movie.douban.com/j/chart/top_list"
    params = {
        'type': type,
        'interval_id': '100:90',
        'action': '',
        'start':start,
        'limit': '20'
    }
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
    }
    resp = requests.get(url=url, params=params, headers=headers)
    list_data = resp.json()
    # print(list_data)
    # print()
    if (len(list_data) > 0):
        fp = open(filename, 'a+', encoding="utf-8")
        json.dump(list_data, fp=fp, ensure_ascii=False)
        fp.write(",")
        print("爬取成功!", start)
        return 1
    else:
        print("爬取结束", start)
        return 0

def mkdic(path):
    folder = os.path.exists(path)
    if not folder:
        os.makedirs(path)

if __name__ == "__main__":
    type_dict = {
        1: "纪录片", 2: "传记", 3: "犯罪", 4: "历史", 5: "动作",
        7: "歌舞", 8: "儿童片", 10: "悬疑", 11: "剧情", 12: "灾难片",
        13: "爱情", 14: "音乐", 15: "冒险", 16: "奇幻片", 17: "科幻片",
        18: "运动", 19: "惊悚", 20: "恐怖", 22: "战争", 23: "短片", 24: "喜剧",
        25: "动画", 27: "西部片", 28: "家庭", 29: "武侠", 30: "古装", 31: "黑色电影"
    }
    mkdic("./爬取得到的json")
    for key,value in type_dict.items():
        type=key
        filename="./爬取得到的json/"+value+".json"
        start = 0
        with open(filename, "w", encoding="utf-8") as fp:
            fp.write("[")
        while (True):
            flag = collect(start,type,filename)
            if (flag == 0):
                break
            start += 20
        with open(filename, "r", encoding="utf-8") as fp:
            data_str = fp.read()
        data_str = data_str[0:len(data_str) - 1]
        data_str = data_str + ']'
        with open(filename, "w", encoding="utf-8") as fp:
            fp.write(data_str)
        data_list = json.loads(s=data_str)
        data_list_new = []
        for item in data_list:
            for item2 in item:
                data_list_new.append(item2)


        mkdic("./爬取得到的表格文件")
        df = pd.DataFrame(data_list_new)
        path = pd.ExcelWriter("./爬取得到的表格文件/"+value+".xlsx")
        df.to_excel(path, encoding="utf-8", index=False)
        path.save()







